#title: "Data Preparation & Summary Stats"
#author: "Qianhui Li"
setwd("/Users/qianhuili/Desktop/GitHub/AAE724/Script/Data_cleaning")

library(tidyr)   
library(dplyr)   
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(leaps)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: foreach
## Loaded glmnet 2.0-18
library(ggplot2)
library(gmodels)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(corrplot)
## corrplot 0.84 loaded
library(ISLR)
library(tree)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(rpart)
library(rpart.plot)
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:gmodels':
## 
##     ci
## The following object is masked from 'package:glmnet':
## 
##     auc
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(corrplot)
library(lfe)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(tidyverse)
## Registered S3 method overwritten by 'cli':
##   method     from
##   print.tree tree
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.3     ✔ purrr   0.3.2
## ✔ readr   1.3.1     ✔ stringr 1.4.0
## ✔ tibble  2.1.3     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate()  masks foreach::accumulate()
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ Matrix::expand()     masks tidyr::expand()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ Matrix::pack()       masks tidyr::pack()
## ✖ car::recode()        masks dplyr::recode()
## ✖ MASS::select()       masks dplyr::select()
## ✖ purrr::some()        masks car::some()
## ✖ Matrix::unpack()     masks tidyr::unpack()
## ✖ purrr::when()        masks foreach::when()
library(viridis)
## Loading required package: viridisLite
library(RColorBrewer)
library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(wesanderson)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(corrplot)
library(ROSE)
## Loaded ROSE 0.0-3
library(naniar)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(blorr)
library(pROC)
#=============================================

##Data Preparation

bankoriginal<-read.csv("bank_data.csv",header=TRUE, sep=";", na.strings=c("unknown","non-existent"))
#Check # & % of missing values
gg_miss_var(bankoriginal)

gg_miss_var(bankoriginal, show_pct = TRUE)

#Since there is "999" in pdays means client was not previously contacted, I convert pdays into a dummy variable, never contacted(999)=0,others=1.

bankoriginal$pdays <-as.factor(bankoriginal$pdays)
bankoriginal$pdays <-ifelse(bankoriginal$pdays==999,0,1)


#The first variable that has the largest proportion of missing values is "default",
#However, it may be possible that customer is not willing to disclose this information to the banking representative. 
#Hence the unknown value in 'default' is actually a separate value.
#Thus I kept the variable "default", and I think it also make sense for "loan" and "housing" loan variable
bankoriginal$default <- as.character(bankoriginal$default)
bankoriginal$default[is.na(bankoriginal$default)] <- "refuse2disclose"

bankoriginal$loan<-as.character(bankoriginal$loan)
bankoriginal$loan[is.na(bankoriginal$loan)] <- "refuse2disclose"

bankoriginal$housing<-as.character(bankoriginal$housing)
bankoriginal$housing[is.na(bankoriginal$housing)] <- "refuse2disclose"
#As indicated by the data contributor, the duration is not known before a call is performed. 
#Also, after the end of the call y is obviously known. 
#Thus, this input should only be included for benchmark purposes and should be discarded if the intention is to have a realistic predictive model.
#Thus I removed "duration"
bankoriginal = bankoriginal %>% 
  select(-duration)

#check for missing value graph again
gg_miss_var(bankoriginal)

gg_miss_var(bankoriginal, show_pct = TRUE)

#omit missing values
bank<-na.omit(bankoriginal)
sum(is.na(bank))
## [1] 0
#Data summary
summary(bank)
##       age                 job            marital     
##  Min.   :17.00   admin.     :10159   divorced: 4417  
##  1st Qu.:32.00   blue-collar: 8788   married :23748  
##  Median :38.00   technician : 6520   single  :11026  
##  Mean   :39.86   services   : 3814                   
##  3rd Qu.:47.00   management : 2798                   
##  Max.   :98.00   retired    : 1617                   
##                  (Other)    : 5495                   
##                education       default            housing         
##  basic.4y           : 4118   Length:39191       Length:39191      
##  basic.6y           : 2264   Class :character   Class :character  
##  basic.9y           : 6006   Mode  :character   Mode  :character  
##  high.school        : 9464                                        
##  illiterate         :   18                                        
##  professional.course: 5225                                        
##  university.degree  :12096                                        
##      loan                contact          month       day_of_week
##  Length:39191       cellular :24983   may    :13128   fri:7417   
##  Class :character   telephone:14208   jul    : 6767   mon:8107   
##  Mode  :character                     aug    : 5947   thu:8194   
##                                       jun    : 5014   tue:7683   
##                                       nov    : 3973   wed:7790   
##                                       apr    : 2493              
##                                       (Other): 1869              
##     campaign          pdays           previous             poutcome    
##  Min.   : 1.000   Min.   :0.0000   Min.   :0.0000   failure    : 4044  
##  1st Qu.: 1.000   1st Qu.:0.0000   1st Qu.:0.0000   nonexistent:33877  
##  Median : 2.000   Median :0.0000   Median :0.0000   success    : 1270  
##  Mean   : 2.566   Mean   :0.0357   Mean   :0.1704                      
##  3rd Qu.: 3.000   3rd Qu.:0.0000   3rd Qu.:0.0000                      
##  Max.   :56.000   Max.   :1.0000   Max.   :7.0000                      
##                                                                        
##   emp.var.rate      cons.price.idx  cons.conf.idx      euribor3m    
##  Min.   :-3.40000   Min.   :92.20   Min.   :-50.80   Min.   :0.634  
##  1st Qu.:-1.80000   1st Qu.:93.08   1st Qu.:-42.70   1st Qu.:1.344  
##  Median : 1.10000   Median :93.44   Median :-41.80   Median :4.857  
##  Mean   : 0.08324   Mean   :93.57   Mean   :-40.54   Mean   :3.624  
##  3rd Qu.: 1.40000   3rd Qu.:93.99   3rd Qu.:-36.40   3rd Qu.:4.961  
##  Max.   : 1.40000   Max.   :94.77   Max.   :-26.90   Max.   :5.045  
##                                                                     
##   nr.employed     y        
##  Min.   :4964   no :34831  
##  1st Qu.:5099   yes: 4360  
##  Median :5191              
##  Mean   :5167              
##  3rd Qu.:5228              
##  Max.   :5228              
## 
#convert variable types
sapply(bank,class)
##            age            job        marital      education        default 
##      "integer"       "factor"       "factor"       "factor"    "character" 
##        housing           loan        contact          month    day_of_week 
##    "character"    "character"       "factor"       "factor"       "factor" 
##       campaign          pdays       previous       poutcome   emp.var.rate 
##      "integer"      "numeric"      "integer"       "factor"      "numeric" 
## cons.price.idx  cons.conf.idx      euribor3m    nr.employed              y 
##      "numeric"      "numeric"      "numeric"      "numeric"       "factor"
  #numerical variables
bank$age <- as.numeric(bank$age)
bank$campaign <- as.numeric(bank$campaign)
bank$previous <- as.numeric(bank$previous)
bank$emp.var.rate <- as.numeric(bank$emp.var.rate)
bank$cons.price.idx <- as.numeric(bank$cons.price.idx)
bank$cons.conf.idx <- as.numeric(bank$cons.conf.idx)
bank$euribor3m <- as.numeric(bank$euribor3m)
bank$nr.employed <- as.numeric(bank$nr.employed)

  #categorical variables
bank$job <-as.factor(bank$job)
bank$marital <-as.factor(bank$marital)
bank$education <-as.factor(bank$education)
bank$default <-as.factor(bank$default)
bank$loan <-as.factor(bank$loan)
bank$housing<-as.factor(bank$housing)
bank$contact <-as.factor(bank$contact)
bank$poutcome <-as.factor(bank$poutcome)
bank$day_of_week <-as.factor(bank$day_of_week)
bank$month <-as.factor(bank$month)

bank$y<-ifelse(bank$y =='yes',1,0)
bank$y <-as.factor(bank$y)
#Check for outliers for numerical variables
p1_age <- ggplot(bank, aes(y, age)) + geom_boxplot(aes(fill = y))

p1_campaign <- ggplot(bank, aes(y, campaign)) + geom_boxplot(aes(fill = y))

p1_previous <- ggplot(bank, aes(y, previous)) + geom_boxplot(aes(fill = y))

p1_emp.var.rate <- ggplot(bank, aes(y, emp.var.rate)) + geom_boxplot(aes(fill = y))

p1_cons.price.idx <- ggplot(bank, aes(y, cons.price.idx)) + geom_boxplot(aes(fill = y))

p1_cons.conf.idx<- ggplot(bank, aes(y, cons.conf.idx)) + geom_boxplot(aes(fill = y))

p1_euribor3m<- ggplot(bank, aes(y, euribor3m)) + geom_boxplot(aes(fill = y))

p1_nr.employed<- ggplot(bank, aes(y, nr.employed)) + geom_boxplot(aes(fill = y))


a1 <- c(p1_age,p1_campaign)
ggarrange(p1_age,p1_campaign, 
          nrow = 1)

b1 <- c(p1_previous,p1_emp.var.rate,
       p1_cons.price.idx)
ggarrange(p1_previous,p1_emp.var.rate,
          p1_cons.price.idx, 
          nrow = 1)

g1 <- c(p1_cons.conf.idx,
       p1_euribor3m,p1_nr.employed)
ggarrange(p1_cons.conf.idx,p1_euribor3m,p1_nr.employed, 
          nrow = 1)

#age==38770
x <- bank$age
qnt <- quantile(x, probs=c(.25, .75), na.rm = T)
H <- 1.5 * IQR(x, na.rm = T)
hb <- H + qnt[2]
hb #remove>69.5
##  75% 
## 69.5
ab <- bank[which(bank$age<hb),]

#campaign==35982
x1 <- bank$campaign
qnt1 <- quantile(x1, probs=c(.25, .75), na.rm = T)
H1 <- 1.5 * IQR(x1, na.rm = T)
hb1<- H1 + qnt1[2]
hb1 #remove>6
## 75% 
##   6
ac <- bank[which(bank$campaign<hb1),]



#cons.conf.idx
x5 <- bank$cons.conf.idx 
qnt5 <- quantile(x5, probs=c(.25, .75), na.rm = T)
H5 <- 1.5 * IQR(x5, na.rm = T)
hb5<- H5 + qnt5[2]
hb5 #remove>-26.95
##    75% 
## -26.95
#From the boxplot for "previous", I decided to treat observations larger than 2 as outliers, thus remove them.

#Result after removing outliers in numerical variables(34,370obs with 20 variables)
bank <- bank[which(bank$age<hb & bank$campaign<hb1 & bank$previous<2 & bank$cons.conf.idx<hb5),]


#Check for outliers for categorical variables with more than 3 categories

pic_job1 <-ggplot(bank, aes(x=job)) + geom_histogram(aes(y=(..count..)), stat='count', fill="slate blue", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Job",
       x="Job", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_job1

#From the histogram, there is no obvious small number of counts for jobs

pic_edu1 <-ggplot(bank, aes(x=education)) + geom_histogram(aes(y=(..count..)), stat='count', fill="yellowgreen", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Education",
       x="Education Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_edu1

table(bank$education)
## 
##            basic.4y            basic.6y            basic.9y 
##                3450                2022                5418 
##         high.school          illiterate professional.course 
##                8330                  16                4592 
##   university.degree 
##               10542
#From the histogram, there is one obvious tiny number of counts for "illterate"(16 observations)
#Thus I decided to drop obs with "illiterate"
bank <-bank[bank$education!="illiterate",,drop=FALSE]

#After removing outliers for both numerical and categorical variables, there are 34,354 obs with 20 variables.
#Check and adjust data imbalance
counts <- table(bank$y)
barplot(counts,col=c("royalblue3","tomato3"),legend = rownames(counts), main = "Term Deposit")

CrossTable(bank$y)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  34354 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |     30933 |      3421 | 
##           |     0.900 |     0.100 | 
##           |-----------|-----------|
## 
## 
## 
## 
#From the graph and the table, we can see that the dataset is highly imbalanced
#Since most machine learning classification algorithms are sensitive to unbalance in the predictor classes.
#I decided to resample the data by Synthetic Minority Oversampling Technique (SMOTE),
#which is a popular algorithm to creates synthetic observations of the minority class

set.seed(88)

balanced_data <- ROSE(y~., data=bank,seed=1)$data
CrossTable(balanced_data$y)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  34354 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |     17245 |     17109 | 
##           |     0.502 |     0.498 | 
##           |-----------|-----------|
## 
## 
## 
## 
counts1 <- table(balanced_data$y)
barplot(counts1,col=c("royalblue3","tomato3"),legend = rownames(counts), main = "Customers' Responses")

#Hence, we now have 17245(50.2%) "no" responses, and 17109 (49.8%)"yes" responses, thus the data is balanced.

#=============================================

##Summary Statistics
summary(balanced_data)
##       age                  job           marital     
##  Min.   : 4.662   admin.     :9514   divorced: 3599  
##  1st Qu.:31.075   blue-collar:6796   married :20133  
##  Median :37.897   technician :5838   single  :10622  
##  Mean   :39.320   services   :3068                   
##  3rd Qu.:46.925   management :2523                   
##  Max.   :82.032   retired    :1362                   
##                   (Other)    :5253                   
##                education                default     
##  basic.4y           : 3045   no             :28726  
##  basic.6y           : 1919   refuse2disclose: 5627  
##  basic.9y           : 4836   yes            :    1  
##  high.school        : 8376                          
##  illiterate         :    0                          
##  professional.course: 4668                          
##  university.degree  :11510                          
##             housing                   loan            contact     
##  no             :15248   no             :28474   cellular :24410  
##  refuse2disclose:  798   refuse2disclose:  798   telephone: 9944  
##  yes            :18308   yes            : 5082                    
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##      month      day_of_week    campaign           pdays          
##  may    :9978   fri:6312    Min.   :-0.8292   Min.   :-0.628708  
##  jul    :5427   mon:6892    1st Qu.: 1.0059   1st Qu.:-0.041724  
##  aug    :4872   thu:7456    Median : 1.6486   Median : 0.005368  
##  jun    :4415   tue:6744    Mean   : 1.8994   Mean   : 0.064230  
##  nov    :3463   wed:6950    3rd Qu.: 2.5750   3rd Qu.: 0.061367  
##  apr    :3275               Max.   : 6.8893   Max.   : 1.547210  
##  (Other):2924                                                    
##     previous               poutcome      emp.var.rate     cons.price.idx 
##  Min.   :-0.80396   failure    : 3312   Min.   :-6.0150   Min.   :91.13  
##  1st Qu.:-0.08213   nonexistent:28827   1st Qu.:-1.8975   1st Qu.:92.98  
##  Median : 0.03874   success    : 2215   Median :-0.1352   Median :93.48  
##  Mean   : 0.16018                       Mean   :-0.3556   Mean   :93.48  
##  3rd Qu.: 0.20029                       3rd Qu.: 1.2431   3rd Qu.:94.00  
##  Max.   : 1.67241                       Max.   : 4.2551   Max.   :95.76  
##                                                                          
##  cons.conf.idx      euribor3m       nr.employed   y        
##  Min.   :-59.00   Min.   :-2.123   Min.   :4834   0:17245  
##  1st Qu.:-44.75   1st Qu.: 1.254   1st Qu.:5083   1:17109  
##  Median :-41.17   Median : 3.658   Median :5167            
##  Mean   :-40.77   Mean   : 3.135   Mean   :5146            
##  3rd Qu.:-36.68   3rd Qu.: 4.901   3rd Qu.:5217            
##  Max.   :-17.42   Max.   : 8.384   Max.   :5381            
## 
  #categorical variables exploration
pic_job <-ggplot(balanced_data, aes(x=job)) + geom_histogram(aes(y=(..count..)), stat='count', fill="slate blue", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Job",
       x="Job", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_job

  #The graph shows that the there are alot of customers work in administritive sector, and the least as entrepreneur.

aa <-ggplot(balanced_data, aes(x = job , fill = y)) +
  geom_bar(stat='count', position='dodge')
aa

  #The graph shows that there are customers that are admin, retired, or technicial are more willing to accept the offer.
#\\\\\\
pic_marital <-ggplot(balanced_data, aes(x=marital)) + geom_histogram(aes(y=(..count..)), stat='count', fill="light pink", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Marital",
       x="Marital Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_marital

bb<-ggplot(balanced_data, aes(x = marital , fill = y)) +
  geom_bar(stat='count', position='dodge')
bb

#\\\\\\

pic_edu <-ggplot(balanced_data, aes(x=education)) + geom_histogram(aes(y=(..count..)), stat='count', fill="yellowgreen", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Education",
       x="Education Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_edu

cc<-ggplot(balanced_data, aes(x = education , fill = y)) +
  geom_bar(stat='count', position='dodge')
cc

#\\\\\\

pic_default <-ggplot(balanced_data, aes(x=default)) + geom_histogram(aes(y=(..count..)), stat='count', fill="light blue", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Default",
       x="Default Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_default

dd<-ggplot(balanced_data, aes(x = default , fill = y)) +
  geom_bar(stat='count', position='dodge')
dd

#\\\\\\

pic_loan <-ggplot(balanced_data, aes(x=loan)) + geom_histogram(aes(y=(..count..)), stat='count', fill="orange1", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Loan",
       x="Loan Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_loan

ee<-ggplot(balanced_data, aes(x = loan , fill = y)) +
  geom_bar(stat='count', position='dodge')
ee

#\\\\\\
pic_housing <-ggplot(balanced_data, aes(x=housing)) + geom_histogram(aes(y=(..count..)), stat='count', fill="grey69", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Housing",
       x="Housing Status", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_housing

ff<-ggplot(balanced_data, aes(x = housing , fill = y)) +
  geom_bar(stat='count', position='dodge')
ff

#\\\\\\
pic_contact <-ggplot(balanced_data, aes(x=contact)) + geom_histogram(aes(y=(..count..)), stat='count', fill="firebrick", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Contact",
       x="Contact Approach", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_contact

gg<-ggplot(balanced_data, aes(x = contact , fill = y)) +
  geom_bar(stat='count', position='dodge')
gg

#\\\\\\
pic_poutcome <-ggplot(balanced_data, aes(x=poutcome)) + geom_histogram(aes(y=(..count..)), stat='count', fill="yellow1", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "poutcome",
       x="Previous Outcome", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_poutcome

hh<-ggplot(balanced_data, aes(x = poutcome , fill = y)) +
  geom_bar(stat='count', position='dodge')
hh

#\\\\\\
pic_dow <-ggplot(balanced_data, aes(x=day_of_week)) + geom_histogram(aes(y=(..count..)), stat='count', fill="turquoise4", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Day of Week",
       x="Day of Week", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_dow

jj<-ggplot(balanced_data, aes(x = day_of_week , fill = y)) +
  geom_bar(stat='count', position='dodge')
jj

#\\\\\\
pic_month <-ggplot(balanced_data, aes(x=month)) + geom_histogram(aes(y=(..count..)), stat='count', fill="darkseagreen4", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Month",
       x="Months", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_month

kk<-ggplot(balanced_data, aes(x = month , fill = y)) +
  geom_bar(stat='count', position='dodge')
kk

#\\\\\\
#response variable
pic_y <-ggplot(balanced_data, aes(x=y)) + geom_histogram(aes(y=(..count..)), stat='count', fill="red", alpha=0.5) + theme_minimal() + 
  theme(plot.title    = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x     = element_text(angle = 45, hjust = 1, size=10),
        axis.text.y     = element_text(size=10)) +
  labs(title    = "Subscribe or not",
       x="Subscription", y="Counts")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
pic_y

CrossTable(balanced_data$y)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  34354 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |     17245 |     17109 | 
##           |     0.502 |     0.498 | 
##           |-----------|-----------|
## 
## 
## 
## 
  #numerical variables exploration
p_age <- ggplot(balanced_data, aes(y, age)) + geom_boxplot(aes(fill = y))
hist(balanced_data$age, col = "yellow2", freq = FALSE)
abline(v = mean(balanced_data$age),
       col = "royalblue",
       lwd = 2)
abline(v = median(balanced_data$age),
       col = "light pink",
       lwd = 2)
legend(x = "topright", 
       c("Density plot", "Mean", "Median"),
       col = c("yellow2", "royalblue", "light pink"),
       lwd = c(2, 2, 2))

#The distribution shows that most customers oberved are less than 40 years old.


p_campaign <- ggplot(balanced_data, aes(y, campaign)) + geom_boxplot(aes(fill = y))

p_pdays <- ggplot(balanced_data, aes(y, pdays)) + geom_boxplot(aes(fill = y))
p_pdays

p_previous <- ggplot(balanced_data, aes(y, previous)) + geom_boxplot(aes(fill = y))
p_previous

p_emp.var.rate <- ggplot(balanced_data, aes(y, emp.var.rate)) + geom_boxplot(aes(fill = y))


p_cons.price.idx <- ggplot(balanced_data, aes(y, cons.price.idx)) + geom_boxplot(aes(fill = y))


p_cons.conf.idx<- ggplot(balanced_data, aes(y, cons.conf.idx)) + geom_boxplot(aes(fill = y))


p_euribor3m<- ggplot(balanced_data, aes(y, euribor3m)) + geom_boxplot(aes(fill = y))


p_nr.employed<- ggplot(balanced_data, aes(y, nr.employed)) + geom_boxplot(aes(fill = y))


a <- c(p_age,p_campaign,p_pdays)
ggarrange(p_age,p_campaign, 
          nrow = 1)

b <- c(p_previous,p_emp.var.rate,
       p_cons.price.idx)
ggarrange(p_previous,p_emp.var.rate,
          p_cons.price.idx, 
          nrow = 1)

g <- c(p_cons.conf.idx,
       p_euribor3m,p_nr.employed)
ggarrange(p_cons.conf.idx,p_euribor3m,p_nr.employed, 
          nrow = 1)

numericdata <- subset(balanced_data, select=c("age", "campaign","previous","emp.var.rate","cons.price.idx","cons.conf.idx","euribor3m","nr.employed","pdays","previous"))

pairs(numericdata)

M <- cor(numericdata)
corrplot(M, method = "circle")

#or view in corr magnitudes
corrplot(M, method = "number")

#From the correlation plot, we can see that there are good correlations between 'cons.price.idx'&'emp.var.rate', 'cons.conf.idx'&'emp.var.rate',cons.conf.idx'&'cons.price.idx','cons.price.idx'&'nr.employed', cons.conf.idx'&'nr.employed','emp.var.rate'& nr.employed',nr.employed'& euribor3m.
 #Those multicollinearity problems may not affect our predictions but indeed affect causal inferences.
#=============================================
#Data Split

index <- createDataPartition(balanced_data$y, p = 0.5, list = FALSE)
train_data <- balanced_data[index, ]
test_data  <- balanced_data[-index, ]



#U??Use of one-hot-coding to transfer categorical variables into numerical variables????
#dmy <- dummyVars(" ~ .", data = balanced_data)
#bank.dummies<- data.frame(predict(dmy, newdata = balanced_data))
#print(bank.dummies)


#===========================================================
#Regressions


###logistic
logit_model <- glm(y ~.,family=binomial(link='logit'),data =train_data)
summary(logit_model)
## 
## Call:
## glm(formula = y ~ ., family = binomial(link = "logit"), data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3508  -0.9364  -0.5245   0.9431   2.2262  
## 
## Coefficients: (1 not defined because of singularities)
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   2.081e+01  4.952e+00   4.202 2.64e-05 ***
## age                          -8.984e-05  1.932e-03  -0.046 0.962914    
## jobblue-collar                2.047e-02  6.616e-02   0.309 0.757005    
## jobentrepreneur               1.026e-01  9.851e-02   1.042 0.297406    
## jobhousemaid                  6.858e-02  1.268e-01   0.541 0.588557    
## jobmanagement                -6.140e-02  7.384e-02  -0.831 0.405702    
## jobretired                    2.440e-01  1.083e-01   2.253 0.024283 *  
## jobself-employed              2.123e-02  9.545e-02   0.222 0.824008    
## jobservices                   5.351e-02  6.977e-02   0.767 0.443138    
## jobstudent                    5.663e-01  1.310e-01   4.323 1.54e-05 ***
## jobtechnician                 4.112e-02  6.123e-02   0.672 0.501829    
## jobunemployed                -8.601e-02  1.158e-01  -0.743 0.457557    
## maritalmarried                1.932e-02  5.808e-02   0.333 0.739418    
## maritalsingle                 2.889e-02  6.498e-02   0.445 0.656666    
## educationbasic.6y             2.242e-01  9.402e-02   2.385 0.017084 *  
## educationbasic.9y             5.266e-02  7.714e-02   0.683 0.494782    
## educationhigh.school          1.504e-01  7.988e-02   1.882 0.059802 .  
## educationprofessional.course  1.636e-01  8.853e-02   1.847 0.064685 .  
## educationuniversity.degree    3.087e-01  8.106e-02   3.808 0.000140 ***
## defaultrefuse2disclose       -2.201e-01  4.953e-02  -4.443 8.86e-06 ***
## housingrefuse2disclose       -2.916e-01  1.142e-01  -2.554 0.010650 *  
## housingyes                   -1.092e-01  3.573e-02  -3.058 0.002231 ** 
## loanrefuse2disclose                  NA         NA      NA       NA    
## loanyes                       2.690e-02  4.921e-02   0.547 0.584602    
## contacttelephone             -6.221e-01  6.255e-02  -9.946  < 2e-16 ***
## monthaug                     -3.671e-01  9.993e-02  -3.674 0.000239 ***
## monthdec                      8.460e-01  3.141e-01   2.693 0.007072 ** 
## monthjul                      1.022e-01  8.570e-02   1.193 0.232830    
## monthjun                      2.056e-01  8.419e-02   2.443 0.014580 *  
## monthmar                      1.463e+00  1.643e-01   8.904  < 2e-16 ***
## monthmay                     -6.175e-01  6.864e-02  -8.997  < 2e-16 ***
## monthnov                     -4.608e-01  8.642e-02  -5.332 9.73e-08 ***
## monthoct                      2.038e+00  2.359e-01   8.641  < 2e-16 ***
## monthsep                     -1.534e-01  1.670e-01  -0.919 0.358274    
## day_of_weekmon               -1.874e-01  5.651e-02  -3.317 0.000909 ***
## day_of_weekthu                8.685e-02  5.552e-02   1.564 0.117753    
## day_of_weektue               -3.579e-02  5.679e-02  -0.630 0.528485    
## day_of_weekwed                7.889e-02  5.604e-02   1.408 0.159239    
## campaign                     -6.702e-03  1.458e-02  -0.460 0.645673    
## pdays                        -9.938e-03  1.558e-01  -0.064 0.949128    
## previous                     -1.772e-01  1.035e-01  -1.712 0.086953 .  
## poutcomenonexistent           2.709e-01  1.192e-01   2.272 0.023064 *  
## poutcomesuccess               2.139e+00  2.129e-01  10.044  < 2e-16 ***
## emp.var.rate                 -1.177e-01  2.112e-02  -5.573 2.51e-08 ***
## cons.price.idx                4.490e-02  4.467e-02   1.005 0.314859    
## cons.conf.idx                 2.984e-02  4.826e-03   6.183 6.30e-10 ***
## euribor3m                    -9.722e-02  1.880e-02  -5.170 2.34e-07 ***
## nr.employed                  -4.575e-03  4.007e-04 -11.419  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23813  on 17177  degrees of freedom
## Residual deviance: 19390  on 17131  degrees of freedom
## AIC: 19484
## 
## Number of Fisher Scoring iterations: 5
anova(logit_model, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: y
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           17177      23814              
## age             1     8.26     17176      23805  0.004064 ** 
## job            10   429.62     17166      23376 < 2.2e-16 ***
## marital         2    53.54     17164      23322 2.366e-12 ***
## education       5    92.92     17159      23229 < 2.2e-16 ***
## default         1   289.77     17158      22939 < 2.2e-16 ***
## housing         2     3.76     17156      22936  0.152368    
## loan            1     1.00     17155      22935  0.316613    
## contact         1   747.52     17154      22187 < 2.2e-16 ***
## month           9  1264.02     17145      20923 < 2.2e-16 ***
## day_of_week     4    31.46     17141      20892 2.462e-06 ***
## campaign        1     4.86     17140      20887  0.027471 *  
## pdays           1   428.49     17139      20458 < 2.2e-16 ***
## previous        1     0.10     17138      20458  0.747511    
## poutcome        2   160.54     17136      20298 < 2.2e-16 ***
## emp.var.rate    1   590.88     17135      19707 < 2.2e-16 ***
## cons.price.idx  1     8.37     17134      19698  0.003822 ** 
## cons.conf.idx   1    45.11     17133      19653 1.867e-11 ***
## euribor3m       1   129.84     17132      19523 < 2.2e-16 ***
## nr.employed     1   133.24     17131      19390 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  #confusion matrix for train
  log.pred.train <-predict(logit_model,data=train_data,type="response")
  log.pred1.train <-ifelse(log.pred.train>0.5,1,0)
  log.confusion.matrix.train <-table(log.pred1.train,train_data$y)
  log.confusion.matrix.train
##                
## log.pred1.train    0    1
##               0 6985 3298
##               1 1638 5257
  log.accuracy.train=sum(diag(log.confusion.matrix.train))/sum(log.confusion.matrix.train)
  log.accuracy.train
## [1] 0.7126557
  #confusion matrix for test
  log.pred.test <-predict(logit_model,data=test_data,type="response")
  log.pred1.test <-ifelse(log.pred.test>0.5,1,0)
  error1 <-mean(log.pred1.test !=test_data$y)
## Warning in `!=.default`(log.pred1.test, test_data$y): longer object length
## is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
  print(paste('Accuracy',1-error1))
## [1] "Accuracy 0.712481080451741"
  #AUC-ROC curve(later decide whether to keep it or not)
  #par(mfrow=c(1,2))
  #pred <- prediction(log.pred.train, train_data$y) 
  #perf <- performance(pred,"tpr","fpr")
  #plot(perf, main = "ROC for Logistic with training data", col='darkslategray3')
  #abline(0,1)
  #pred1 <- prediction(log.pred.test, test_data$y) 
  #perf1 <- performance(log.pred.test,"tpr","fpr")
  #plot(perf1, main = "ROC for logistic with test data", col='darkslategray3')
  #abline(0,1)
  
 # AUC.log.train<- auc(roc(train_data$y,log.pred1.train))
  #AUC.log.train
  
  #AUC.log.test<- auc(roc(test_data$y,log.pred1.test))
  #AUC.log.test

###Decision tree
tree_model <- rpart(y ~ ., data = train_data,method="class")
tree_model
## n= 17178 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 17178 8555 0 (0.50197928 0.49802072)  
##    2) pdays< 0.08583835 13731 5299 0 (0.61408492 0.38591508)  
##      4) pdays>=-0.08703113 11425 3116 0 (0.72726477 0.27273523)  
##        8) euribor3m>=3.128397 7282 1215 0 (0.83315023 0.16684977) *
##        9) euribor3m< 3.128397 4143 1901 0 (0.54115375 0.45884625)  
##         18) month=apr,may,nov 2892 1045 0 (0.63865837 0.36134163)  
##           36) nr.employed>=5052.667 2498  765 0 (0.69375500 0.30624500) *
##           37) nr.employed< 5052.667 394  114 1 (0.28934010 0.71065990) *
##         19) month=aug,dec,jul,jun,mar,oct,sep 1251  395 1 (0.31574740 0.68425260) *
##      5) pdays< -0.08703113 2306  123 1 (0.05333912 0.94666088) *
##    3) pdays>=0.08583835 3447  191 1 (0.05541050 0.94458950) *
fancyRpartPlot(tree_model)

#predict train
predictions <- predict(tree_model, train_data, type = "class")

#confusion matrix train
tree.confusion.matrix.train <- prop.table(table(predictions, train_data$y))
tree.confusion.matrix.train
##            
## predictions          0          1
##           0 0.45406916 0.11526371
##           1 0.04791012 0.38275701
CrossTable(train_data$y, predictions,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('actual subscription status', 'predicted subscription status'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  17178 
## 
##  
##                            | predicted subscription status 
## actual subscription status |         0 |         1 | Row Total | 
## ---------------------------|-----------|-----------|-----------|
##                          0 |      7800 |       823 |      8623 | 
##                            |     0.454 |     0.048 |           | 
## ---------------------------|-----------|-----------|-----------|
##                          1 |      1980 |      6575 |      8555 | 
##                            |     0.115 |     0.383 |           | 
## ---------------------------|-----------|-----------|-----------|
##               Column Total |      9780 |      7398 |     17178 | 
## ---------------------------|-----------|-----------|-----------|
## 
## 
  #train accuracy
tree.accuracy.train=sum(diag(tree.confusion.matrix.train))/sum(tree.confusion.matrix.train)
tree.accuracy.train
## [1] 0.8368262
#predict test
cart_pred <- predict(tree_model , test_data,type="class")



  # Confusion matrix for test
tree.confusion.matrix.test <- prop.table(table(cart_pred, test_data$y))
tree.confusion.matrix.test
##          
## cart_pred         0         1
##         0 0.4506870 0.1103284
##         1 0.0512925 0.3876921
#test accuracy
tree.accuracy.test=sum(diag(tree.confusion.matrix.test))/sum(tree.confusion.matrix.test)
tree.accuracy.test
## [1] 0.8383791
  # Cross table validation for test
CrossTable(test_data$y, cart_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('actual customers responses', 'predicted customers responses'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  17176 
## 
##  
##                            | predicted customers responses 
## actual customers responses |         0 |         1 | Row Total | 
## ---------------------------|-----------|-----------|-----------|
##                          0 |      7741 |       881 |      8622 | 
##                            |     0.451 |     0.051 |           | 
## ---------------------------|-----------|-----------|-----------|
##                          1 |      1895 |      6659 |      8554 | 
##                            |     0.110 |     0.388 |           | 
## ---------------------------|-----------|-----------|-----------|
##               Column Total |      9636 |      7540 |     17176 | 
## ---------------------------|-----------|-----------|-----------|
## 
## 
##prune tree
set.seed(123)

printcp(tree_model)
## 
## Classification tree:
## rpart(formula = y ~ ., data = train_data, method = "class")
## 
## Variables actually used in tree construction:
## [1] euribor3m   month       nr.employed pdays      
## 
## Root node error: 8555/17178 = 0.49802
## 
## n= 17178 
## 
##         CP nsplit rel error  xerror      xstd
## 1 0.358270      0   1.00000 1.00807 0.0076601
## 2 0.240795      1   0.64173 0.64243 0.0071462
## 3 0.026943      2   0.40094 0.40245 0.0061331
## 4 0.019404      4   0.34705 0.35523 0.0058461
## 5 0.010000      5   0.32764 0.33372 0.0057031
plotcp(tree_model)

tree_model$cptable[which.min(tree_model$cptable[,"xerror"]),"CP"]
## [1] 0.01
bestcp <- tree_model$cptable[which.min(tree_model$cptable[,"xerror"]),"CP"]
tree.pruned <- prune(tree_model, cp = bestcp)

fancyRpartPlot(tree.pruned)

# Compute the train accuracy of the pruned tree

train_data$pred <- predict(tree.pruned, train_data, type = "class")
accuracy_prun_train <- mean(train_data$pred == train_data$y)
accuracy_prun_train
## [1] 0.8368262
pruned.confusion.matrix.train <- prop.table(table(train_data$pred, train_data$y))
pruned.confusion.matrix.train
##    
##              0          1
##   0 0.45406916 0.11526371
##   1 0.04791012 0.38275701
# Compute the test accuracy of the pruned tree
test_data$pred <- predict(tree.pruned, test_data, type = "class")
accuracy_prune_test <- mean(test_data$pred== test_data$y)
accuracy_prune_test
## [1] 0.8383791
pruned.confusion.matrix.test <- prop.table(table(test_data$pred, test_data$y))
pruned.confusion.matrix.test
##    
##             0         1
##   0 0.4506870 0.1103284
##   1 0.0512925 0.3876921
#The tree after being pruned is the same as before

#AUC-ROC
#par(mfrow=c(1,2))
#pred3 <- prediction(as.numeric(predictions), as.numeric(train_data$y)) 
#perf3 <- performance(pred3,"tpr","fpr")
#plot(perf3,main = "ROC for Tree with training data", col='darkslategray3')
#abline(0,1)
#pred4 <- prediction(as.numeric(cart_pred), as.numeric(test_data$y)) 
#perf4 <- performance(pred4,"tpr","fpr")
#plot(perf4,main = "ROC for Tree with test data", col='darkslategray3')
#abline(0,1)

#auc_tree_train<- auc(roc(as.numeric(train_data$y), as.numeric(predictions)))
#auc_tree_train

#auc_tree_test <- auc(roc(as.numeric(test_data$y), as.numeric(cart_pred)))
#auc_tree_test 

#==============================================================

#neural nets
library(nnet)
library(NeuralNetTools)
library(neuralnet)
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:ROCR':
## 
##     prediction
## The following object is masked from 'package:dplyr':
## 
##     compute
set.seed(888)
nn <- train(y ~ .,
                  data = train_data,
                  method = "nnet")
## # weights:  53
## initial  value 13371.280674 
## final  value 11904.853547 
## converged
## # weights:  157
## initial  value 11922.455211 
## final  value 11904.853547 
## converged
## # weights:  261
## initial  value 12538.436480 
## final  value 11904.853547 
## converged
## # weights:  53
## initial  value 11926.377996 
## iter  10 value 11903.523664
## iter  20 value 11403.173730
## iter  30 value 10592.551849
## iter  40 value 8822.282665
## iter  50 value 7481.891610
## iter  60 value 7093.933701
## iter  70 value 7043.888673
## iter  80 value 7036.264710
## iter  90 value 7032.114368
## iter 100 value 7002.851963
## final  value 7002.851963 
## stopped after 100 iterations
## # weights:  157
## initial  value 11923.059630 
## iter  10 value 11904.857099
## iter  20 value 11884.068833
## iter  30 value 10841.625259
## iter  40 value 8556.689789
## iter  50 value 7330.485416
## iter  60 value 7192.403274
## iter  70 value 7152.962919
## iter  80 value 7104.814746
## iter  90 value 7070.634083
## iter 100 value 7030.794184
## final  value 7030.794184 
## stopped after 100 iterations
## # weights:  261
## initial  value 16397.080413 
## iter  10 value 11904.867274
## iter  20 value 11904.780955
## iter  30 value 11902.511036
## iter  40 value 11527.437450
## iter  50 value 10922.375413
## iter  60 value 10835.814753
## iter  70 value 9628.930987
## iter  80 value 7412.770301
## iter  90 value 7155.281641
## iter 100 value 7012.390201
## final  value 7012.390201 
## stopped after 100 iterations
## # weights:  53
## initial  value 12051.385954 
## final  value 11904.854532 
## converged
## # weights:  157
## initial  value 11921.087966 
## final  value 11904.856192 
## converged
## # weights:  261
## initial  value 13462.633566 
## final  value 11904.858062 
## converged
## # weights:  53
## initial  value 12438.145104 
## final  value 11905.549249 
## converged
## # weights:  157
## initial  value 12899.458187 
## final  value 11905.549249 
## converged
## # weights:  261
## initial  value 14101.505302 
## final  value 11905.549249 
## converged
## # weights:  53
## initial  value 13247.038648 
## iter  10 value 11905.035835
## iter  20 value 11890.711499
## iter  30 value 11814.584646
## iter  40 value 11637.972564
## iter  50 value 11242.934602
## iter  60 value 10769.400613
## iter  70 value 10149.245549
## iter  80 value 8406.985171
## iter  90 value 7751.012655
## iter 100 value 7348.360292
## final  value 7348.360292 
## stopped after 100 iterations
## # weights:  157
## initial  value 12797.103858 
## iter  10 value 11905.543517
## iter  20 value 11904.898617
## iter  30 value 11577.377113
## iter  40 value 11302.563479
## iter  50 value 11068.084614
## iter  60 value 10375.662490
## iter  70 value 9608.801945
## iter  80 value 8720.792357
## iter  90 value 7677.054674
## iter 100 value 7594.182718
## final  value 7594.182718 
## stopped after 100 iterations
## # weights:  261
## initial  value 12197.265452 
## iter  10 value 11900.703103
## iter  20 value 11552.170856
## iter  30 value 10845.628190
## iter  40 value 10449.540319
## iter  50 value 8909.638226
## iter  60 value 8069.561773
## iter  70 value 7612.100487
## iter  80 value 7362.173558
## iter  90 value 7279.984175
## iter 100 value 7247.870234
## final  value 7247.870234 
## stopped after 100 iterations
## # weights:  53
## initial  value 12438.077656 
## final  value 11905.550095 
## converged
## # weights:  157
## initial  value 11968.328805 
## final  value 11905.552126 
## converged
## # weights:  261
## initial  value 12745.245106 
## final  value 11905.553257 
## converged
## # weights:  53
## initial  value 12770.846520 
## final  value 11906.278698 
## converged
## # weights:  157
## initial  value 11932.748834 
## final  value 11906.278698 
## converged
## # weights:  261
## initial  value 12667.944088 
## final  value 11906.278698 
## converged
## # weights:  53
## initial  value 12833.077204 
## final  value 11906.279243 
## converged
## # weights:  157
## initial  value 12290.772490 
## iter  10 value 11906.281718
## iter  20 value 11906.107182
## iter  30 value 10457.810517
## iter  40 value 10106.838512
## iter  50 value 9184.360300
## iter  60 value 8990.970320
## iter  70 value 7895.208422
## iter  80 value 7421.777703
## iter  90 value 7166.124354
## iter 100 value 7085.873797
## final  value 7085.873797 
## stopped after 100 iterations
## # weights:  261
## initial  value 16828.059567 
## iter  10 value 11906.110222
## iter  20 value 11891.285568
## iter  30 value 11848.564417
## iter  40 value 10076.026511
## iter  50 value 7912.376697
## iter  60 value 7520.351081
## iter  70 value 7239.962623
## iter  80 value 7065.978303
## iter  90 value 6982.235930
## iter 100 value 6935.806907
## final  value 6935.806907 
## stopped after 100 iterations
## # weights:  53
## initial  value 12450.613400 
## final  value 11906.279343 
## converged
## # weights:  157
## initial  value 11919.673642 
## final  value 11906.281264 
## converged
## # weights:  261
## initial  value 11920.101347 
## final  value 11906.282925 
## converged
## # weights:  53
## initial  value 12032.192383 
## final  value 11906.731377 
## converged
## # weights:  157
## initial  value 13057.775733 
## final  value 11906.731377 
## converged
## # weights:  261
## initial  value 11918.950550 
## final  value 11906.731377 
## converged
## # weights:  53
## initial  value 11955.986279 
## iter  10 value 11906.579524
## iter  20 value 11859.408545
## iter  30 value 10584.555385
## iter  40 value 9507.106943
## iter  50 value 8470.370494
## iter  60 value 7495.951733
## iter  70 value 7094.722711
## iter  80 value 7043.442156
## iter  90 value 7024.056373
## iter 100 value 7015.136803
## final  value 7015.136803 
## stopped after 100 iterations
## # weights:  157
## initial  value 12128.318771 
## iter  10 value 11906.582219
## iter  20 value 11868.447862
## iter  30 value 10901.820722
## iter  40 value 9631.237721
## iter  50 value 8475.922734
## iter  60 value 7680.412082
## iter  70 value 7225.264064
## iter  80 value 6933.382715
## iter  90 value 6795.254967
## iter 100 value 6711.636656
## final  value 6711.636656 
## stopped after 100 iterations
## # weights:  261
## initial  value 12246.003305 
## iter  10 value 11903.671350
## iter  20 value 11821.106192
## iter  30 value 11024.470587
## iter  40 value 9236.008682
## iter  50 value 8155.009940
## iter  60 value 7282.918018
## iter  70 value 7192.281481
## iter  80 value 7122.010104
## iter  90 value 7071.315239
## iter 100 value 7006.721210
## final  value 7006.721210 
## stopped after 100 iterations
## # weights:  53
## initial  value 13566.927379 
## final  value 11906.732306 
## converged
## # weights:  157
## initial  value 11909.576728 
## final  value 11906.733873 
## converged
## # weights:  261
## initial  value 11926.223994 
## final  value 11906.735731 
## converged
## # weights:  53
## initial  value 12079.285688 
## final  value 11906.463123 
## converged
## # weights:  157
## initial  value 11960.152086 
## final  value 11906.463123 
## converged
## # weights:  261
## initial  value 11913.996327 
## final  value 11906.463123 
## converged
## # weights:  53
## initial  value 14074.636587 
## iter  10 value 11899.057611
## iter  20 value 11847.861975
## iter  30 value 11764.361376
## iter  40 value 11043.836462
## iter  50 value 10215.926309
## iter  60 value 9939.773030
## iter  70 value 9733.401441
## iter  80 value 9415.398494
## iter  90 value 9246.370891
## iter 100 value 9136.333166
## final  value 9136.333166 
## stopped after 100 iterations
## # weights:  157
## initial  value 11960.938163 
## iter  10 value 11906.396833
## iter  20 value 11652.315700
## iter  30 value 10653.528466
## iter  40 value 8701.192865
## iter  50 value 8034.603201
## iter  60 value 7397.186456
## iter  70 value 7194.289619
## iter  80 value 7106.186297
## iter  90 value 7029.173095
## iter 100 value 6976.720457
## final  value 6976.720457 
## stopped after 100 iterations
## # weights:  261
## initial  value 11914.551547 
## iter  10 value 11906.387890
## iter  20 value 11903.793697
## iter  30 value 10787.741283
## iter  40 value 7810.993015
## iter  50 value 7507.754470
## iter  60 value 7420.253957
## iter  70 value 7359.758160
## iter  80 value 7247.058728
## iter  90 value 7107.692996
## iter 100 value 7059.041260
## final  value 7059.041260 
## stopped after 100 iterations
## # weights:  53
## initial  value 12760.450425 
## final  value 11906.464025 
## converged
## # weights:  157
## initial  value 12505.888769 
## final  value 11906.465445 
## converged
## # weights:  261
## initial  value 12019.640189 
## final  value 11906.467178 
## converged
## # weights:  53
## initial  value 11942.344805 
## final  value 11905.939184 
## converged
## # weights:  157
## initial  value 12531.698931 
## final  value 11905.939184 
## converged
## # weights:  261
## initial  value 12224.460111 
## final  value 11905.939184 
## converged
## # weights:  53
## initial  value 12434.194716 
## iter  10 value 11905.909868
## iter  20 value 9488.493950
## iter  30 value 7769.756809
## iter  40 value 7312.110180
## iter  50 value 7225.267161
## iter  60 value 7195.331725
## iter  70 value 7163.362029
## iter  80 value 7149.689917
## iter  90 value 7117.908482
## iter 100 value 7075.182650
## final  value 7075.182650 
## stopped after 100 iterations
## # weights:  157
## initial  value 11913.858795 
## iter  10 value 11905.948857
## iter  20 value 11905.767062
## iter  30 value 11893.937597
## iter  40 value 11707.051620
## iter  50 value 11254.281196
## iter  60 value 10945.389072
## iter  70 value 10562.496001
## iter  80 value 9209.081686
## iter  90 value 7664.043475
## iter 100 value 7567.794581
## final  value 7567.794581 
## stopped after 100 iterations
## # weights:  261
## initial  value 11982.591215 
## iter  10 value 11905.768213
## iter  20 value 11833.756355
## iter  30 value 10651.051954
## iter  40 value 9677.835071
## iter  50 value 7588.213045
## iter  60 value 7440.733511
## iter  70 value 7318.085141
## iter  80 value 7309.734959
## iter  90 value 7299.484355
## iter 100 value 7238.072140
## final  value 7238.072140 
## stopped after 100 iterations
## # weights:  53
## initial  value 12699.862470 
## final  value 11905.940016 
## converged
## # weights:  157
## initial  value 11955.721752 
## final  value 11905.941522 
## converged
## # weights:  261
## initial  value 11907.826694 
## final  value 11905.943384 
## converged
## # weights:  53
## initial  value 11907.718187 
## final  value 11905.524216 
## converged
## # weights:  157
## initial  value 12179.034698 
## final  value 11905.524216 
## converged
## # weights:  261
## initial  value 13368.748053 
## final  value 11905.524216 
## converged
## # weights:  53
## initial  value 12290.819165 
## iter  10 value 11905.502819
## iter  20 value 11852.180841
## iter  30 value 10593.841336
## iter  40 value 9226.494186
## iter  50 value 8389.023618
## iter  60 value 8275.085316
## iter  70 value 7320.231438
## iter  80 value 7162.964814
## iter  90 value 7120.400329
## iter 100 value 7111.601812
## final  value 7111.601812 
## stopped after 100 iterations
## # weights:  157
## initial  value 14374.646606 
## iter  10 value 11905.036721
## iter  20 value 11904.428434
## iter  30 value 11532.203209
## iter  40 value 10560.325079
## iter  50 value 10235.116345
## iter  60 value 10088.095662
## iter  70 value 9724.776028
## iter  80 value 9220.660287
## iter  90 value 8459.680058
## iter 100 value 7702.683499
## final  value 7702.683499 
## stopped after 100 iterations
## # weights:  261
## initial  value 12777.988057 
## iter  10 value 11905.192425
## iter  20 value 11569.632660
## iter  30 value 10780.171019
## iter  40 value 8448.906519
## iter  50 value 7631.108172
## iter  60 value 7523.133813
## iter  70 value 7371.167838
## iter  80 value 7142.157696
## iter  90 value 7058.557071
## iter 100 value 6991.046694
## final  value 6991.046694 
## stopped after 100 iterations
## # weights:  53
## initial  value 15146.840094 
## final  value 11905.524988 
## converged
## # weights:  157
## initial  value 15379.578590 
## final  value 11905.526543 
## converged
## # weights:  261
## initial  value 12458.555030 
## final  value 11905.528372 
## converged
## # weights:  53
## initial  value 11921.563525 
## final  value 11905.205650 
## converged
## # weights:  157
## initial  value 12814.629987 
## final  value 11905.205650 
## converged
## # weights:  261
## initial  value 12575.735968 
## final  value 11905.205650 
## converged
## # weights:  53
## initial  value 11907.804027 
## iter  10 value 11905.208334
## final  value 11905.205719 
## converged
## # weights:  157
## initial  value 12703.677703 
## iter  10 value 11905.214792
## iter  20 value 11904.756595
## iter  30 value 11755.847038
## iter  40 value 11741.585923
## iter  50 value 11273.738788
## iter  60 value 9909.944002
## iter  70 value 8333.144216
## iter  80 value 7604.081003
## iter  90 value 7270.397781
## iter 100 value 7099.773414
## final  value 7099.773414 
## stopped after 100 iterations
## # weights:  261
## initial  value 14894.053460 
## iter  10 value 11905.202112
## iter  20 value 11902.502354
## iter  30 value 11809.120543
## iter  40 value 10998.692213
## iter  50 value 7674.980557
## iter  60 value 7509.239638
## iter  70 value 7414.561177
## iter  80 value 7184.287863
## iter  90 value 7059.860094
## iter 100 value 6928.616002
## final  value 6928.616002 
## stopped after 100 iterations
## # weights:  53
## initial  value 11938.743487 
## final  value 11905.206494 
## converged
## # weights:  157
## initial  value 13018.214946 
## final  value 11905.207854 
## converged
## # weights:  261
## initial  value 12416.071459 
## final  value 11905.210075 
## converged
## # weights:  53
## initial  value 12238.292670 
## final  value 11906.517147 
## converged
## # weights:  157
## initial  value 13175.771246 
## final  value 11906.517147 
## converged
## # weights:  261
## initial  value 12446.614366 
## final  value 11906.517147 
## converged
## # weights:  53
## initial  value 12247.894705 
## iter  10 value 11906.264529
## iter  20 value 11853.819380
## iter  30 value 10986.062254
## iter  40 value 9762.045804
## iter  50 value 9114.067298
## iter  60 value 7935.441116
## iter  70 value 7304.253481
## iter  80 value 7130.926194
## iter  90 value 7088.658184
## iter 100 value 7045.891747
## final  value 7045.891747 
## stopped after 100 iterations
## # weights:  157
## initial  value 12671.959062 
## iter  10 value 11906.495450
## iter  20 value 11827.396148
## iter  30 value 10861.510571
## iter  40 value 7623.787755
## iter  50 value 7333.499226
## iter  60 value 7221.085899
## iter  70 value 7144.234767
## iter  80 value 7111.196619
## iter  90 value 7099.605868
## iter 100 value 7095.496964
## final  value 7095.496964 
## stopped after 100 iterations
## # weights:  261
## initial  value 11912.092092 
## iter  10 value 11904.386401
## iter  20 value 11639.150357
## iter  30 value 11218.547909
## iter  40 value 9573.034022
## iter  50 value 8857.471134
## iter  60 value 7964.101568
## iter  70 value 7394.646379
## iter  80 value 7230.914157
## iter  90 value 7157.074051
## iter 100 value 7122.122594
## final  value 7122.122594 
## stopped after 100 iterations
## # weights:  53
## initial  value 11926.825561 
## final  value 11906.518186 
## converged
## # weights:  157
## initial  value 11964.200413 
## final  value 11906.519908 
## converged
## # weights:  261
## initial  value 13516.865754 
## final  value 11906.521593 
## converged
## # weights:  53
## initial  value 13173.780303 
## final  value 11906.542761 
## converged
## # weights:  157
## initial  value 12028.729840 
## final  value 11906.542761 
## converged
## # weights:  261
## initial  value 11911.435918 
## final  value 11906.542761 
## converged
## # weights:  53
## initial  value 12189.976865 
## final  value 11906.542767 
## converged
## # weights:  157
## initial  value 12446.762420 
## iter  10 value 11906.454041
## iter  20 value 11109.957093
## iter  30 value 10825.228784
## iter  40 value 10051.067425
## iter  50 value 8972.904263
## iter  60 value 8158.014036
## iter  70 value 7684.265425
## iter  80 value 7445.718520
## iter  90 value 7208.466511
## iter 100 value 7190.107309
## final  value 7190.107309 
## stopped after 100 iterations
## # weights:  261
## initial  value 12593.155907 
## iter  10 value 11906.534736
## iter  20 value 11533.527198
## iter  30 value 10539.986797
## iter  40 value 9441.192327
## iter  50 value 7768.278947
## iter  60 value 7494.960564
## iter  70 value 7418.455492
## iter  80 value 7253.621284
## iter  90 value 7186.942531
## iter 100 value 7177.292252
## final  value 7177.292252 
## stopped after 100 iterations
## # weights:  53
## initial  value 11933.330091 
## final  value 11906.543853 
## converged
## # weights:  157
## initial  value 12765.749541 
## final  value 11906.545187 
## converged
## # weights:  261
## initial  value 18003.372264 
## final  value 11906.546693 
## converged
## # weights:  53
## initial  value 12346.444469 
## final  value 11906.852462 
## converged
## # weights:  157
## initial  value 12610.300290 
## final  value 11906.852462 
## converged
## # weights:  261
## initial  value 13921.584586 
## final  value 11906.852462 
## converged
## # weights:  53
## initial  value 12198.945572 
## iter  10 value 11875.707774
## iter  20 value 11025.097409
## iter  30 value 10107.015291
## iter  40 value 8570.447634
## iter  50 value 7653.155540
## iter  60 value 7414.704549
## iter  70 value 7272.916863
## iter  80 value 7211.995456
## iter  90 value 7172.010451
## iter 100 value 7147.086163
## final  value 7147.086163 
## stopped after 100 iterations
## # weights:  157
## initial  value 11909.520226 
## iter  10 value 11906.778866
## iter  20 value 10720.693194
## iter  30 value 9452.753810
## iter  40 value 8521.876719
## iter  50 value 8029.242070
## iter  60 value 7683.968916
## iter  70 value 7501.982754
## iter  80 value 7296.944919
## iter  90 value 7137.733178
## iter 100 value 7082.624490
## final  value 7082.624490 
## stopped after 100 iterations
## # weights:  261
## initial  value 12925.751638 
## iter  10 value 11906.730235
## iter  20 value 11704.036274
## iter  30 value 10224.258781
## iter  40 value 9412.820705
## iter  50 value 8291.407366
## iter  60 value 7546.961264
## iter  70 value 7459.750581
## iter  80 value 7386.657974
## iter  90 value 7250.654620
## iter 100 value 7095.115398
## final  value 7095.115398 
## stopped after 100 iterations
## # weights:  53
## initial  value 13099.743440 
## final  value 11906.853460 
## converged
## # weights:  157
## initial  value 12004.423032 
## final  value 11906.855157 
## converged
## # weights:  261
## initial  value 12087.399816 
## final  value 11906.856934 
## converged
## # weights:  53
## initial  value 11994.204091 
## final  value 11906.797392 
## converged
## # weights:  157
## initial  value 11906.817579 
## final  value 11906.797392 
## converged
## # weights:  261
## initial  value 12109.007571 
## final  value 11906.797392 
## converged
## # weights:  53
## initial  value 11916.475807 
## iter  10 value 11906.617712
## iter  20 value 11203.821234
## iter  30 value 9656.219210
## iter  40 value 8034.452514
## iter  50 value 7369.184651
## iter  60 value 7252.769624
## iter  70 value 7202.103433
## iter  80 value 7186.531414
## iter  90 value 7185.101899
## iter 100 value 7183.923347
## final  value 7183.923347 
## stopped after 100 iterations
## # weights:  157
## initial  value 13152.148891 
## iter  10 value 11906.800687
## iter  20 value 11868.769462
## iter  30 value 11060.168756
## iter  40 value 9373.264532
## iter  50 value 8695.223185
## iter  60 value 8244.502376
## iter  70 value 7562.893400
## iter  80 value 7353.821598
## iter  90 value 7304.901748
## iter 100 value 7254.578970
## final  value 7254.578970 
## stopped after 100 iterations
## # weights:  261
## initial  value 11993.946562 
## iter  10 value 11906.668505
## iter  20 value 11653.560645
## iter  30 value 9695.611118
## iter  40 value 7930.697720
## iter  50 value 7510.244893
## iter  60 value 7327.821621
## iter  70 value 7267.612416
## iter  80 value 7200.617422
## iter  90 value 7187.045995
## iter 100 value 7184.604900
## final  value 7184.604900 
## stopped after 100 iterations
## # weights:  53
## initial  value 12393.786764 
## final  value 11906.798089 
## converged
## # weights:  157
## initial  value 12393.746939 
## final  value 11906.799674 
## converged
## # weights:  261
## initial  value 12559.772436 
## final  value 11906.801664 
## converged
## # weights:  53
## initial  value 12631.774938 
## final  value 11906.747677 
## converged
## # weights:  157
## initial  value 12424.935807 
## final  value 11906.747677 
## converged
## # weights:  261
## initial  value 14309.974441 
## final  value 11906.747677 
## converged
## # weights:  53
## initial  value 12198.144833 
## final  value 11906.747682 
## converged
## # weights:  157
## initial  value 13687.349866 
## iter  10 value 11906.701067
## iter  20 value 11902.278350
## iter  30 value 11120.163016
## iter  40 value 9408.858230
## iter  50 value 8585.654638
## iter  60 value 7565.082349
## iter  70 value 7430.839353
## iter  80 value 7295.344377
## iter  90 value 7247.754630
## iter 100 value 7224.929875
## final  value 7224.929875 
## stopped after 100 iterations
## # weights:  261
## initial  value 12446.843443 
## final  value 11906.748087 
## converged
## # weights:  53
## initial  value 11910.496028 
## final  value 11906.748419 
## converged
## # weights:  157
## initial  value 12107.811267 
## final  value 11906.750785 
## converged
## # weights:  261
## initial  value 12023.902906 
## final  value 11906.751852 
## converged
## # weights:  53
## initial  value 11988.679355 
## final  value 11905.233478 
## converged
## # weights:  157
## initial  value 11958.257860 
## final  value 11905.233478 
## converged
## # weights:  261
## initial  value 12970.820887 
## final  value 11905.233478 
## converged
## # weights:  53
## initial  value 11932.926246 
## final  value 11905.234109 
## converged
## # weights:  157
## initial  value 12317.451499 
## iter  10 value 11905.170051
## iter  20 value 11738.398225
## iter  30 value 11231.419068
## iter  40 value 8382.671968
## iter  50 value 7507.940251
## iter  60 value 7288.198960
## iter  70 value 7210.071496
## iter  80 value 7178.355655
## iter  90 value 7173.486796
## iter 100 value 7155.956920
## final  value 7155.956920 
## stopped after 100 iterations
## # weights:  261
## initial  value 12478.025452 
## iter  10 value 11905.226339
## iter  20 value 11885.304261
## iter  30 value 10680.616548
## iter  40 value 10205.460266
## iter  50 value 8442.441808
## iter  60 value 7782.140694
## iter  70 value 7465.746995
## iter  80 value 7255.522414
## iter  90 value 7191.232962
## iter 100 value 7103.492980
## final  value 7103.492980 
## stopped after 100 iterations
## # weights:  53
## initial  value 11913.349075 
## final  value 11905.234163 
## converged
## # weights:  157
## initial  value 14141.735260 
## final  value 11905.236309 
## converged
## # weights:  261
## initial  value 12207.972586 
## final  value 11905.237821 
## converged
## # weights:  53
## initial  value 12137.857538 
## final  value 11906.770380 
## converged
## # weights:  157
## initial  value 11999.369656 
## final  value 11906.770380 
## converged
## # weights:  261
## initial  value 11919.029016 
## final  value 11906.770380 
## converged
## # weights:  53
## initial  value 12208.058546 
## final  value 11906.770388 
## converged
## # weights:  157
## initial  value 11946.247709 
## iter  10 value 11891.018005
## iter  20 value 11323.172976
## iter  30 value 10583.301615
## iter  40 value 10331.613780
## iter  50 value 8920.025815
## iter  60 value 7759.301686
## iter  70 value 7361.641406
## iter  80 value 7265.694300
## iter  90 value 7169.593424
## iter 100 value 7137.287630
## final  value 7137.287630 
## stopped after 100 iterations
## # weights:  261
## initial  value 12708.397514 
## iter  10 value 11906.712164
## iter  20 value 11802.032574
## iter  30 value 9992.531731
## iter  40 value 9078.248760
## iter  50 value 7846.234351
## iter  60 value 7418.870588
## iter  70 value 7225.832734
## iter  80 value 7151.783839
## iter  90 value 7072.161614
## iter 100 value 7044.157927
## final  value 7044.157927 
## stopped after 100 iterations
## # weights:  53
## initial  value 11907.060013 
## final  value 11906.771139 
## converged
## # weights:  157
## initial  value 13907.823722 
## final  value 11906.772729 
## converged
## # weights:  261
## initial  value 12040.427011 
## final  value 11906.774819 
## converged
## # weights:  53
## initial  value 11909.305143 
## final  value 11906.656862 
## converged
## # weights:  157
## initial  value 11906.801277 
## final  value 11906.656862 
## converged
## # weights:  261
## initial  value 14237.139511 
## final  value 11906.656862 
## converged
## # weights:  53
## initial  value 14383.209255 
## iter  10 value 11906.562382
## iter  20 value 10821.896092
## iter  30 value 9438.746887
## iter  40 value 7825.113943
## iter  50 value 7398.722152
## iter  60 value 7209.257325
## iter  70 value 7150.472450
## iter  80 value 7138.172426
## iter  90 value 7124.636937
## iter 100 value 7118.186528
## final  value 7118.186528 
## stopped after 100 iterations
## # weights:  157
## initial  value 12846.437976 
## iter  10 value 11906.606201
## iter  20 value 11869.806528
## iter  30 value 10808.431929
## iter  40 value 10089.442760
## iter  50 value 7928.782160
## iter  60 value 7528.913169
## iter  70 value 7368.543661
## iter  80 value 7319.269579
## iter  90 value 7284.970010
## iter 100 value 7193.783877
## final  value 7193.783877 
## stopped after 100 iterations
## # weights:  261
## initial  value 12333.872591 
## iter  10 value 11906.667457
## iter  20 value 11906.510611
## iter  30 value 11898.632720
## iter  40 value 10264.215099
## iter  50 value 9193.339456
## iter  60 value 8576.342368
## iter  70 value 7879.120939
## iter  80 value 7467.537496
## iter  90 value 7219.879428
## iter 100 value 7128.427135
## final  value 7128.427135 
## stopped after 100 iterations
## # weights:  53
## initial  value 12135.082821 
## final  value 11906.657556 
## converged
## # weights:  157
## initial  value 14792.591123 
## final  value 11906.659309 
## converged
## # weights:  261
## initial  value 15334.560078 
## final  value 11906.660998 
## converged
## # weights:  53
## initial  value 12058.401240 
## final  value 11906.614016 
## converged
## # weights:  157
## initial  value 11924.995615 
## final  value 11906.614016 
## converged
## # weights:  261
## initial  value 15941.377331 
## final  value 11906.614016 
## converged
## # weights:  53
## initial  value 13474.681036 
## final  value 11906.614036 
## converged
## # weights:  157
## initial  value 11910.093425 
## final  value 11906.614558 
## converged
## # weights:  261
## initial  value 12082.291085 
## iter  10 value 11906.625523
## iter  20 value 11906.524469
## iter  30 value 11868.567740
## iter  40 value 11500.553356
## iter  50 value 10631.255513
## iter  60 value 9059.798386
## iter  70 value 8119.476075
## iter  80 value 7647.045378
## iter  90 value 7384.969607
## iter 100 value 7131.727892
## final  value 7131.727892 
## stopped after 100 iterations
## # weights:  53
## initial  value 11906.643623 
## final  value 11906.614848 
## converged
## # weights:  157
## initial  value 12468.747634 
## final  value 11906.616443 
## converged
## # weights:  261
## initial  value 12289.112922 
## final  value 11906.618679 
## converged
## # weights:  53
## initial  value 13043.483062 
## final  value 11906.542761 
## converged
## # weights:  157
## initial  value 12868.308374 
## final  value 11906.542761 
## converged
## # weights:  261
## initial  value 16256.467850 
## final  value 11906.542761 
## converged
## # weights:  53
## initial  value 12768.633952 
## iter  10 value 11906.543923
## final  value 11906.542796 
## converged
## # weights:  157
## initial  value 14695.900902 
## iter  10 value 11906.479897
## iter  20 value 11867.540866
## iter  30 value 11460.572589
## iter  40 value 10373.637772
## iter  50 value 7928.117509
## iter  60 value 7545.726760
## iter  70 value 7533.172782
## iter  80 value 7400.152258
## iter  90 value 7173.022606
## iter 100 value 7087.085666
## final  value 7087.085666 
## stopped after 100 iterations
## # weights:  261
## initial  value 11970.114742 
## iter  10 value 11906.282720
## iter  20 value 11889.018748
## iter  30 value 10959.841405
## iter  40 value 9997.210037
## iter  50 value 9066.689399
## iter  60 value 7871.062676
## iter  70 value 7451.592203
## iter  80 value 7317.838483
## iter  90 value 7095.768110
## iter 100 value 6849.448508
## final  value 6849.448508 
## stopped after 100 iterations
## # weights:  53
## initial  value 11993.055710 
## final  value 11906.543452 
## converged
## # weights:  157
## initial  value 13260.669378 
## final  value 11906.545458 
## converged
## # weights:  261
## initial  value 12156.309397 
## final  value 11906.546670 
## converged
## # weights:  53
## initial  value 11909.890924 
## final  value 11906.868180 
## converged
## # weights:  157
## initial  value 14200.861108 
## final  value 11906.868180 
## converged
## # weights:  261
## initial  value 12073.207069 
## final  value 11906.868180 
## converged
## # weights:  53
## initial  value 12139.185528 
## iter  10 value 11906.573465
## iter  20 value 11113.802196
## iter  30 value 10308.140762
## iter  40 value 8384.682560
## iter  50 value 7480.557100
## iter  60 value 7373.356563
## iter  70 value 7326.319175
## iter  80 value 7287.160738
## iter  90 value 7188.210091
## iter 100 value 7120.663703
## final  value 7120.663703 
## stopped after 100 iterations
## # weights:  157
## initial  value 11939.525145 
## iter  10 value 11903.321151
## iter  20 value 11420.249871
## iter  30 value 10478.878436
## iter  40 value 8503.259206
## iter  50 value 7344.381464
## iter  60 value 7178.865463
## iter  70 value 7152.176250
## iter  80 value 7134.627879
## iter  90 value 7114.287566
## iter 100 value 7109.421197
## final  value 7109.421197 
## stopped after 100 iterations
## # weights:  261
## initial  value 12113.220175 
## final  value 11906.868957 
## converged
## # weights:  53
## initial  value 12002.140121 
## final  value 11906.869089 
## converged
## # weights:  157
## initial  value 12111.500772 
## final  value 11906.870697 
## converged
## # weights:  261
## initial  value 12481.913796 
## final  value 11906.872397 
## converged
## # weights:  53
## initial  value 12955.562951 
## final  value 11906.862591 
## converged
## # weights:  157
## initial  value 11926.543342 
## final  value 11906.862591 
## converged
## # weights:  261
## initial  value 14171.148724 
## final  value 11906.862591 
## converged
## # weights:  53
## initial  value 12343.593179 
## iter  10 value 11906.811657
## iter  20 value 11835.195685
## iter  30 value 11249.166393
## iter  40 value 8132.431282
## iter  50 value 7308.225356
## iter  60 value 7167.154073
## iter  70 value 7025.468634
## iter  80 value 6959.137751
## iter  90 value 6928.759912
## iter 100 value 6923.881761
## final  value 6923.881761 
## stopped after 100 iterations
## # weights:  157
## initial  value 12025.550112 
## iter  10 value 11904.009958
## iter  20 value 11854.869940
## iter  30 value 11312.348379
## iter  40 value 9015.939581
## iter  50 value 7389.520668
## iter  60 value 7239.016612
## iter  70 value 7173.487695
## iter  80 value 7019.213558
## iter  90 value 6961.045295
## iter 100 value 6948.179559
## final  value 6948.179559 
## stopped after 100 iterations
## # weights:  261
## initial  value 11931.344666 
## iter  10 value 11906.759531
## iter  20 value 11883.386071
## iter  30 value 11838.144178
## iter  40 value 11134.272357
## iter  50 value 10631.585484
## iter  60 value 10248.982978
## iter  70 value 8642.576053
## iter  80 value 7620.518950
## iter  90 value 7366.337422
## iter 100 value 7071.885139
## final  value 7071.885139 
## stopped after 100 iterations
## # weights:  53
## initial  value 11914.714825 
## final  value 11906.863475 
## converged
## # weights:  157
## initial  value 12694.420330 
## final  value 11906.864873 
## converged
## # weights:  261
## initial  value 12199.303674 
## final  value 11906.866931 
## converged
## # weights:  53
## initial  value 12391.946945 
## final  value 11906.311764 
## converged
## # weights:  157
## initial  value 12583.338666 
## final  value 11906.311764 
## converged
## # weights:  261
## initial  value 12346.489618 
## final  value 11906.311764 
## converged
## # weights:  53
## initial  value 12234.045338 
## iter  10 value 11906.170995
## iter  20 value 11904.844328
## iter  30 value 11710.679637
## iter  40 value 11126.666049
## iter  50 value 10570.152664
## iter  60 value 10442.631279
## iter  70 value 10213.714087
## iter  80 value 9306.175588
## iter  90 value 7707.272339
## iter 100 value 7295.717878
## final  value 7295.717878 
## stopped after 100 iterations
## # weights:  157
## initial  value 11908.986393 
## iter  10 value 11906.269507
## iter  10 value 11906.269450
## iter  20 value 11904.925954
## iter  30 value 10537.167300
## iter  40 value 8759.816028
## iter  50 value 7807.240454
## iter  60 value 7214.960445
## iter  70 value 7114.550008
## iter  80 value 7050.845185
## iter  90 value 6948.745246
## iter 100 value 6892.542127
## final  value 6892.542127 
## stopped after 100 iterations
## # weights:  261
## initial  value 12019.846559 
## iter  10 value 11906.314401
## iter  20 value 11906.074804
## iter  30 value 11072.930183
## iter  40 value 10023.614185
## iter  50 value 8248.222887
## iter  60 value 7356.468001
## iter  70 value 7181.839721
## iter  80 value 7137.070689
## iter  90 value 7119.295929
## iter 100 value 7093.917529
## final  value 7093.917529 
## stopped after 100 iterations
## # weights:  53
## initial  value 12121.578810 
## final  value 11906.312775 
## converged
## # weights:  157
## initial  value 13490.851945 
## final  value 11906.314329 
## converged
## # weights:  261
## initial  value 13472.673749 
## final  value 11906.316068 
## converged
## # weights:  53
## initial  value 11908.666194 
## final  value 11906.879357 
## converged
## # weights:  157
## initial  value 12295.941545 
## final  value 11906.879357 
## converged
## # weights:  261
## initial  value 11926.419273 
## final  value 11906.879357 
## converged
## # weights:  53
## initial  value 12107.699563 
## final  value 11906.879357 
## converged
## # weights:  157
## initial  value 11953.221183 
## iter  10 value 11906.837787
## iter  20 value 11881.883496
## iter  30 value 10927.541103
## iter  40 value 10704.517216
## iter  50 value 8190.611325
## iter  60 value 7501.097085
## iter  70 value 7365.122629
## iter  80 value 7205.268370
## iter  90 value 7144.364065
## iter 100 value 7131.820294
## final  value 7131.820294 
## stopped after 100 iterations
## # weights:  261
## initial  value 12818.088846 
## iter  10 value 11906.838267
## iter  20 value 11718.479665
## iter  30 value 11061.702156
## iter  40 value 8380.914670
## iter  50 value 7548.816411
## iter  60 value 7468.129757
## iter  70 value 7383.188811
## iter  80 value 7214.113877
## iter  90 value 6994.789583
## iter 100 value 6959.159471
## final  value 6959.159471 
## stopped after 100 iterations
## # weights:  53
## initial  value 12072.390224 
## final  value 11906.880250 
## converged
## # weights:  157
## initial  value 12295.861327 
## final  value 11906.882058 
## converged
## # weights:  261
## initial  value 14427.844131 
## final  value 11906.883663 
## converged
## # weights:  53
## initial  value 11906.953600 
## final  value 11906.490601 
## converged
## # weights:  157
## initial  value 14909.172882 
## final  value 11906.490601 
## converged
## # weights:  261
## initial  value 13484.895920 
## final  value 11906.490601 
## converged
## # weights:  53
## initial  value 12084.714383 
## final  value 11906.490619 
## converged
## # weights:  157
## initial  value 12194.364449 
## final  value 11906.490618 
## converged
## # weights:  261
## initial  value 11923.347570 
## iter  10 value 11906.352493
## iter  20 value 11873.659849
## iter  30 value 10834.256922
## iter  40 value 8178.244065
## iter  50 value 7780.471769
## iter  60 value 7654.955001
## iter  70 value 7577.982627
## iter  80 value 7386.256369
## iter  90 value 7145.879407
## iter 100 value 6840.442445
## final  value 6840.442445 
## stopped after 100 iterations
## # weights:  53
## initial  value 12525.929612 
## final  value 11906.491398 
## converged
## # weights:  157
## initial  value 13645.829819 
## final  value 11906.493223 
## converged
## # weights:  261
## initial  value 13975.693125 
## final  value 11906.494811 
## converged
## # weights:  53
## initial  value 12759.360209 
## final  value 11906.731377 
## converged
## # weights:  157
## initial  value 12417.731575 
## final  value 11906.731377 
## converged
## # weights:  261
## initial  value 14642.443820 
## final  value 11906.731377 
## converged
## # weights:  53
## initial  value 11908.239566 
## final  value 11906.731402 
## converged
## # weights:  157
## initial  value 13350.412465 
## iter  10 value 11906.633968
## iter  20 value 11095.954716
## iter  30 value 9683.448921
## iter  40 value 7686.861074
## iter  50 value 7578.406138
## iter  60 value 7492.783669
## iter  70 value 7323.950699
## iter  80 value 7110.543725
## iter  90 value 7044.011881
## iter 100 value 7022.457694
## final  value 7022.457694 
## stopped after 100 iterations
## # weights:  261
## initial  value 13934.814064 
## iter  10 value 11906.662805
## iter  20 value 11705.286812
## iter  30 value 11094.788359
## iter  40 value 10193.019569
## iter  50 value 7999.167677
## iter  60 value 7625.014795
## iter  70 value 7423.757480
## iter  80 value 7210.594483
## iter  90 value 7122.789138
## iter 100 value 7093.408667
## final  value 7093.408667 
## stopped after 100 iterations
## # weights:  53
## initial  value 11907.036672 
## final  value 11906.732126 
## converged
## # weights:  157
## initial  value 12848.321059 
## final  value 11906.734001 
## converged
## # weights:  261
## initial  value 11939.407944 
## final  value 11906.735494 
## converged
## # weights:  53
## initial  value 12772.016621 
## final  value 11906.739643 
## converged
## # weights:  157
## initial  value 11977.295581 
## final  value 11906.739643 
## converged
## # weights:  261
## initial  value 16922.111689 
## final  value 11906.739643 
## converged
## # weights:  53
## initial  value 12240.183266 
## iter  10 value 11906.618039
## iter  20 value 11886.180486
## iter  30 value 9534.564860
## iter  40 value 8758.934413
## iter  50 value 7794.743970
## iter  60 value 7407.167935
## iter  70 value 7316.817968
## iter  80 value 7303.563022
## iter  90 value 7261.626278
## iter 100 value 7214.808214
## final  value 7214.808214 
## stopped after 100 iterations
## # weights:  157
## initial  value 12013.364729 
## iter  10 value 11906.743020
## iter  20 value 11906.651000
## iter  30 value 11905.115068
## iter  40 value 11817.795528
## iter  50 value 11257.090431
## iter  60 value 10591.023171
## iter  70 value 9508.164516
## iter  80 value 8849.861726
## iter  90 value 8706.839109
## iter 100 value 8241.027224
## final  value 8241.027224 
## stopped after 100 iterations
## # weights:  261
## initial  value 12400.250323 
## iter  10 value 11905.069719
## iter  20 value 11375.646203
## iter  30 value 10915.460070
## iter  40 value 9608.289430
## iter  50 value 7902.581535
## iter  60 value 7405.309213
## iter  70 value 7321.362692
## iter  80 value 7287.334600
## iter  90 value 7220.346597
## iter 100 value 7214.996224
## final  value 7214.996224 
## stopped after 100 iterations
## # weights:  53
## initial  value 12846.538539 
## final  value 11906.740468 
## converged
## # weights:  157
## initial  value 14624.893275 
## final  value 11906.742220 
## converged
## # weights:  261
## initial  value 12253.071724 
## final  value 11906.744199 
## converged
## # weights:  261
## initial  value 13046.092906 
## iter  10 value 11906.618773
## iter  20 value 11848.034988
## iter  30 value 11662.315048
## iter  40 value 11229.716722
## iter  50 value 11108.793207
## iter  60 value 11098.992152
## iter  70 value 11081.423604
## iter  80 value 10980.371714
## iter  90 value 9280.964308
## iter 100 value 8091.121890
## final  value 8091.121890 
## stopped after 100 iterations
print(nn)
## Neural Network 
## 
## 17178 samples
##    20 predictor
##     2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 17178, 17178, 17178, 17178, 17178, 17178, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   1     0e+00  0.4977082  0.0000000
##   1     1e-04  0.4977082  0.0000000
##   1     1e-01  0.6857579  0.3743859
##   3     0e+00  0.4977082  0.0000000
##   3     1e-04  0.4977082  0.0000000
##   3     1e-01  0.8117347  0.6247934
##   5     0e+00  0.4977082  0.0000000
##   5     1e-04  0.4977082  0.0000000
##   5     1e-01  0.8123242  0.6249784
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
plotnet(nn)

#train
nnpredtrain <- predict(nn, train_data)
resulttrainnn <-table(predicted=nnpredtrain,true=train_data$y)
resulttrainnn
##          true
## predicted    0    1
##         0 7429 1874
##         1 1194 6681
acctrainnn =sum(diag(resulttrainnn))/sum(resulttrainnn)
acctrainnn
## [1] 0.8213995
#test
nnpredtest <- predict(nn, test_data)
resulttestnn <-table(predicted=nnpredtest,true=test_data$y)
resulttestnn
##          true
## predicted    0    1
##         0 7378 1772
##         1 1244 6782
acctestnn =sum(diag(resulttestnn))/sum(resulttestnn)
acctestnn
## [1] 0.8244061
png("nn.png",height=2500, width=3000) 
plot(nn) 
dev.off()
## quartz_off_screen 
##                 2
#AUC-ROC curve

#par(mfrow=c(1,2))
#pred5 <- prediction(pred.train.nn, train_data$y) 
#perf5 <- performance(pred5,"tpr","fpr")
#plot(perf5, main = "ROC for NN with training data", col='darkslategray3')
#abline(0,1)
#pred6 <- prediction(preds.test.nn, test_data$y) 
#perf6 <- performance(pred6,"tpr","fpr")
#plot(perf6, main = "ROC for NN with test data", col='darkslategray3')
#abline(0,1)

#AUC_NN_train <- auc(roc(train_data$y, pred.train.nn))
#AUC_NN_train # Train


#=============================================================

#Random Forest
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:rattle':
## 
##     importance
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
RF.model <- randomForest(y~., data=train_data, ntree=100, importance=TRUE)
RF.model
## 
## Call:
##  randomForest(formula = y ~ ., data = train_data, ntree = 100,      importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 100
## No. of variables tried at each split: 4
## 
##         OOB estimate of  error rate: 13.42%
## Confusion matrix:
##      0    1 class.error
## 0 7653  970   0.1124899
## 1 1336 7219   0.1561660
summary(RF.model)
##                 Length Class  Mode     
## call                5  -none- call     
## type                1  -none- character
## predicted       17178  factor numeric  
## err.rate          300  -none- numeric  
## confusion           6  -none- numeric  
## votes           34356  matrix numeric  
## oob.times       17178  -none- numeric  
## classes             2  -none- character
## importance         80  -none- numeric  
## importanceSD       60  -none- numeric  
## localImportance     0  -none- NULL     
## proximity           0  -none- NULL     
## ntree               1  -none- numeric  
## mtry                1  -none- numeric  
## forest             14  -none- list     
## y               17178  factor numeric  
## test                0  -none- NULL     
## inbag               0  -none- NULL     
## terms               3  terms  call
#Next we display an error plot of the random forest model:
plot(RF.model)

RF.predict.train <- predict(RF.model, newdata = train_data)
RF.train.cm <- as.matrix(table(Actual1 = train_data$y, Predicted1 = RF.predict.train))
RF.train.cm
##        Predicted1
## Actual1    0    1
##       0 8622    1
##       1    0 8555
accuracy_train_rf=sum(diag(RF.train.cm))/sum(RF.train.cm)
accuracy_train_rf
## [1] 0.9999418
library(knitr)
RF.predict <- predict(RF.model, newdata = test_data)
RF.cm <- as.matrix(table(Actual = test_data$y, Predicted = RF.predict))
RF.cm
##       Predicted
## Actual    0    1
##      0 7658  964
##      1 1234 7320
kable(RF.cm, caption = "Random Forest Test Confusion Matrix")
Random Forest Test Confusion Matrix
0 1
0 7658 964
1 1234 7320
accuracy_test_rf=sum(diag(RF.cm))/sum(RF.cm)
accuracy_test_rf
## [1] 0.8720307
#Below we test the accuracy on the training and test datasets and we see that it is 90.87% and 83.45%, respectively. 
#The “out of sample” error is 16.51% and is in agreement with the OOB error:

#library(randomForestExplainer)
#explain_forest(RF.model, interactions = TRUE, data = train_data)

#AUC-ROC curve

#RFROC.train<-roc(train_data$y,RF.predict.train)
#RFAUC.train<-RFROC.train$auc

#RFROC.test<-roc(test_data$y,RF.predict)
#RFAUC.test<-RFROC.test$auc

#=============================================

#Casual Inference part

## Conditional Inference Tree
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## 
## Attaching package: 'modeltools'
## The following object is masked from 'package:car':
## 
##     Predict
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
set.seed(1985)

#model training
ctree_model =  ctree(y ~ .,data = train_data,controls=ctree_control(maxsurrogate=3))
ctree_model
## 
##   Conditional inference tree with 30 terminal nodes
## 
## Response:  y 
## Inputs:  age, job, marital, education, default, housing, loan, contact, month, day_of_week, campaign, pdays, previous, poutcome, emp.var.rate, cons.price.idx, cons.conf.idx, euribor3m, nr.employed, pred 
## Number of observations:  17178 
## 
## 1) pred == {0}; criterion = 1, statistic = 7935.052
##   2) month == {apr, mar, oct}; criterion = 1, statistic = 378.34
##     3) day_of_week == {fri, mon}; criterion = 1, statistic = 79.423
##       4) month == {mar, oct}; criterion = 1, statistic = 40.916
##         5)*  weights = 23 
##       4) month == {apr}
##         6) marital == {single}; criterion = 0.998, statistic = 33.939
##           7) previous <= -0.08272794; criterion = 0.991, statistic = 12.384
##             8)*  weights = 23 
##           7) previous > -0.08272794
##             9)*  weights = 82 
##         6) marital == {divorced, married}
##           10) job == {admin., blue-collar, management, services, student, technician, unemployed}; criterion = 1, statistic = 44.446
##             11)*  weights = 212 
##           10) job == {entrepreneur, housemaid, retired, self-employed}
##             12)*  weights = 35 
##     3) day_of_week == {thu, tue, wed}
##       13) default == {refuse2disclose}; criterion = 1, statistic = 43.372
##         14) nr.employed <= 5119.292; criterion = 0.997, statistic = 14.372
##           15)*  weights = 25 
##         14) nr.employed > 5119.292
##           16)*  weights = 10 
##       13) default == {no}
##         17) job == {blue-collar, entrepreneur, management, services, unemployed}; criterion = 0.997, statistic = 34.856
##           18)*  weights = 144 
##         17) job == {admin., housemaid, retired, self-employed, student, technician}
##           19) age <= 51.2425; criterion = 0.99, statistic = 15.028
##             20)*  weights = 244 
##           19) age > 51.2425
##             21)*  weights = 56 
##   2) month == {aug, dec, jul, jun, may, nov}
##     22) contact == {telephone}; criterion = 1, statistic = 131.48
##       23) month == {aug, nov}; criterion = 1, statistic = 40.708
##         24)*  weights = 141 
##       23) month == {dec, jul, jun, may}
##         25)*  weights = 3482 
##     22) contact == {cellular}
##       26) month == {aug, dec, nov}; criterion = 1, statistic = 59.357
##         27)*  weights = 2311 
##       26) month == {jul, jun, may}
##         28) month == {jul}; criterion = 0.99, statistic = 17.539
##           29)*  weights = 1497 
##         28) month == {jun, may}
##           30) nr.employed <= 5141.647; criterion = 1, statistic = 24.29
##             31)*  weights = 1382 
##           30) nr.employed > 5141.647
##             32) nr.employed <= 5162.049; criterion = 0.973, statistic = 10.268
##               33)*  weights = 83 
##             32) nr.employed > 5162.049
##               34) housing == {no}; criterion = 0.988, statistic = 16.054
##                 35)*  weights = 17 
##               34) housing == {refuse2disclose, yes}
##                 36)*  weights = 13 
## 1) pred == {1}
##   37) month == {aug, jul, jun, may, nov, sep}; criterion = 1, statistic = 91.023
##     38) poutcome == {failure, nonexistent}; criterion = 1, statistic = 47.376
##       39) cons.conf.idx <= -33.11439; criterion = 1, statistic = 39.937
##         40) contact == {telephone}; criterion = 1, statistic = 51.815
##           41) euribor3m <= 1.979441; criterion = 1, statistic = 43.52
##             42)*  weights = 165 
##           41) euribor3m > 1.979441
##             43)*  weights = 710 
##         40) contact == {cellular}
##           44) month == {aug, jul, may, nov, sep}; criterion = 0.967, statistic = 19.329
##             45)*  weights = 2601 
##           44) month == {jun}
##             46)*  weights = 621 
##       39) cons.conf.idx > -33.11439
##         47) euribor3m <= 1.61896; criterion = 0.979, statistic = 12.581
##           48) cons.conf.idx <= -28.11941; criterion = 0.999, statistic = 15.761
##             49) euribor3m <= -0.313062; criterion = 0.995, statistic = 13.275
##               50)*  weights = 43 
##             49) euribor3m > -0.313062
##               51)*  weights = 408 
##           48) cons.conf.idx > -28.11941
##             52)*  weights = 96 
##         47) euribor3m > 1.61896
##           53)*  weights = 185 
##     38) poutcome == {success}
##       54) nr.employed <= 5101.956; criterion = 1, statistic = 23.066
##         55)*  weights = 674 
##       54) nr.employed > 5101.956
##         56)*  weights = 135 
##   37) month == {apr, dec, mar, oct}
##     57) campaign <= 4.274889; criterion = 0.975, statistic = 14.33
##       58)*  weights = 1711 
##     57) campaign > 4.274889
##       59)*  weights = 49
plot(ctree_model,main="Conditional Inference Tree for Customers' Responses")

#train prediction
train_data$pred = predict(ctree_model, train_data)

#train confusion Matrix
confusionMatrix(train_data$y, factor(train_data$pred))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 7635  988
##          1 1651 6904
##                                           
##                Accuracy : 0.8464          
##                  95% CI : (0.8409, 0.8517)
##     No Information Rate : 0.5406          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6926          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.8222          
##             Specificity : 0.8748          
##          Pos Pred Value : 0.8854          
##          Neg Pred Value : 0.8070          
##              Prevalence : 0.5406          
##          Detection Rate : 0.4445          
##    Detection Prevalence : 0.5020          
##       Balanced Accuracy : 0.8485          
##                                           
##        'Positive' Class : 0               
## 
#test prediction
test_data$pred = predict(ctree_model, test_data)

#test confusion Matrix
confusionMatrix(test_data$y, factor(test_data$pred))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 7552 1070
##          1 1578 6976
##                                           
##                Accuracy : 0.8458          
##                  95% CI : (0.8403, 0.8512)
##     No Information Rate : 0.5316          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6916          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.8272          
##             Specificity : 0.8670          
##          Pos Pred Value : 0.8759          
##          Neg Pred Value : 0.8155          
##              Prevalence : 0.5316          
##          Detection Rate : 0.4397          
##    Detection Prevalence : 0.5020          
##       Balanced Accuracy : 0.8471          
##                                           
##        'Positive' Class : 0               
##